# -*- coding: utf-8 -*-
# @File : blog_spider.py
# @Time : 2024/6/18 15:25
# @Author : syq
# @Email : 1721169065@qq.com
# @Software: PyCharm
import requests
from bs4 import BeautifulSoup

urls=[
    #爬虫地址：博客园的首页https://www.cnblogs.com，后面的是第几页
    f"https://www.cnblogs.com/#p{page}"
    for page in range(1,50+1)
]
#相当于生产者
def craw(url):
    r=requests.get(url)
    return r.text
    # print(r.text)
    #len(r.text)获取长度
    # print(url,len(r.text))
#得到每篇文章的标题
def parse(html):
    #class="post-item-title"
    soup=BeautifulSoup(html,"html.parser")
    links=soup.find_all("a",class_="post-item-title")
    return [(link["href"],link.get_text()) for link in links]
if __name__ == '__main__':
    for result in parse(craw(urls[2])):
        print(result)
